library(miloR)
library(Seurat)
library(SingleCellExperiment)
library(scater)
library(scuttle)
library(scran)
library(org.Hs.eg.db)
library(org.Mm.eg.db)
library(clusterProfiler)
library(msigdbr)
library(dplyr)
library(ggplot2)
library(patchwork)
library(ggpubr)
theme_set(theme_bw())
We will simply read the files we created with the combined seurat objects from human and mouse cells.
hg19_scRNA <- readRDS("data/R_objects/hg19_RNA.rds")
mm10_scRNA <- readRDS("data/R_objects/mm10_RNA.rds")
We will now add metadata columns indicating the subtype of breast cancer and whether the tumor is resistant or sensitive to chemotherapy
hg19_scRNA@meta.data <- hg19_scRNA@meta.data %>%
mutate(subtype=case_when(
orig.ident=="HBCx-95"| orig.ident=="HBCx-95_CAPAR"~"Triple_negative",
orig.ident=="HBCx-22"| orig.ident=="HBCx22-TAMR"~"Luminal")) %>%
mutate(drug_resistance=case_when(
grepl("R", orig.ident)==T~"Resistant",
grepl("R", orig.ident)==F~"Sensitive"),
sample=rownames(.))
mm10_scRNA@meta.data <- mm10_scRNA@meta.data %>%
mutate(subtype=case_when(
orig.ident=="HBCx-95"| orig.ident=="HBCx-95_CAPAR"~"Triple_negative",
orig.ident=="HBCx-22"| orig.ident=="HBCx22-TAMR"~"Luminal")) %>%
mutate(drug_resistance=case_when(
grepl("R", orig.ident)==T~"Resistant",
grepl("R", orig.ident)==F~"Sensitive"),
sample=rownames(.))
# Create object that can store the KNN graph as a reduced dimension
hg19_scRNA_milo <- as.SingleCellExperiment(hg19_scRNA)
hg19_scRNA_milo <- Milo(hg19_scRNA_milo)
# Same for mouse
mm10_scRNA_milo <- as.SingleCellExperiment(mm10_scRNA)
mm10_scRNA_milo <- Milo(mm10_scRNA_milo)
hg19_scRNA_milo <- buildGraph(hg19_scRNA_milo, k=20, d=30)
## Constructing kNN graph with k:20
mm10_scRNA_milo <- buildGraph(mm10_scRNA_milo, k=30, d=30)
## Constructing kNN graph with k:30
hg19_scRNA_milo <- makeNhoods(hg19_scRNA_milo, prop = 0.2, k = 20, d=30, refined = TRUE)
## Checking valid object
## Running refined sampling with reduced_dim
plotNhoodSizeHist(hg19_scRNA_milo)
mm10_scRNA_milo <- makeNhoods(mm10_scRNA_milo, prop = 0.2, k = 30, d=30, refined = TRUE)
## Checking valid object
## Running refined sampling with reduced_dim
plotNhoodSizeHist(mm10_scRNA_milo)
Note that we have selected k for constructing the graph and defining neighbours to achieve a distribution peak between 50 and 100
hg19_scRNA_milo <- countCells(hg19_scRNA_milo, meta.data = data.frame(colData(hg19_scRNA_milo)), sample="orig.ident")
## Checking meta.data validity
## Counting cells in neighbourhoods
mm10_scRNA_milo <- countCells(mm10_scRNA_milo, meta.data = data.frame(colData(mm10_scRNA_milo)), sample="orig.ident")
## Checking meta.data validity
## Counting cells in neighbourhoods
In this step we will perform a statistical test to find populations that are significantly more or less abundant in one group in comparison to another.
# Create design matrix
design_hg19 <- data.frame(colData(hg19_scRNA_milo))[,c("subtype", "drug_resistance","orig.ident")]
design_hg19 <- distinct(design_hg19)
rownames(design_hg19) <- design_hg19[,"orig.ident"]
design_hg19
## subtype drug_resistance orig.ident
## HBCx-95 Triple_negative Sensitive HBCx-95
## HBCx-95_CAPAR Triple_negative Resistant HBCx-95_CAPAR
## HBCx-22 Luminal Sensitive HBCx-22
## HBCx22-TAMR Luminal Resistant HBCx22-TAMR
# Calculate distances
hg19_scRNA_milo <- calcNhoodDistance(hg19_scRNA_milo, d=30)
## as(<dgTMatrix>, "dgCMatrix") is deprecated since Matrix 1.5-0; do as(., "CsparseMatrix") instead
# Perform the statistical test
comparison_subtype <- testNhoods(hg19_scRNA_milo, design = ~ drug_resistance+subtype, design.df = design_hg19)
## Using TMM normalisation
## Performing spatial FDR correction withk-distance weighting
comparison_resistance <- testNhoods(hg19_scRNA_milo, design = ~subtype + drug_resistance, design.df = design_hg19)
## Using TMM normalisation
## Performing spatial FDR correction withk-distance weighting
# Create design matrix
design_mm10 <- data.frame(colData(mm10_scRNA_milo))[,c("subtype", "drug_resistance","orig.ident")]
design_mm10 <- distinct(design_mm10)
rownames(design_mm10) <- design_mm10[,"orig.ident"]
design_mm10
## subtype drug_resistance orig.ident
## HBCx-95 Triple_negative Sensitive HBCx-95
## HBCx-95_CAPAR Triple_negative Resistant HBCx-95_CAPAR
## HBCx-22 Luminal Sensitive HBCx-22
## HBCx22-TAMR Luminal Resistant HBCx22-TAMR
# Calculate distances
mm10_scRNA_milo <- calcNhoodDistance(mm10_scRNA_milo, d=30)
# Perform the statistical test
comparison_subtype_mm10 <- testNhoods(mm10_scRNA_milo, design = ~ drug_resistance+subtype, design.df = design_mm10)
## Using TMM normalisation
## Performing spatial FDR correction withk-distance weighting
comparison_resistance_mm10 <- testNhoods(mm10_scRNA_milo, design = ~ subtype+drug_resistance, design.df = design_mm10)
## Using TMM normalisation
## Performing spatial FDR correction withk-distance weighting
F1_A <- ggplot(comparison_subtype, aes(PValue)) + geom_histogram(bins=50) + ggtitle("P-value distribution for contrasts by subtype")
F1_B <- ggplot(comparison_resistance, aes(PValue)) + geom_histogram(bins=50) + ggtitle("P-value distribution for contrasts by drug sensitivity")
ggarrange(F1_A, F1_B)
F4_A <-ggplot(comparison_subtype_mm10, aes(PValue)) + geom_histogram(bins=50) + ggtitle("P-value distribution for contrasts by subtype")
F4_B <-ggplot(comparison_resistance_mm10, aes(PValue)) + geom_histogram(bins=50) + ggtitle("P-value distribution for contrasts by drug sensitivity")
ggarrange(F4_A, F4_B)
ggplot(comparison_subtype, aes(logFC, -log10(SpatialFDR))) +
geom_point() +
geom_hline(yintercept = 1)
ggplot(comparison_resistance, aes(logFC, -log10(SpatialFDR))) +
geom_point() +
geom_hline(yintercept = 1)
ggplot(comparison_subtype_mm10, aes(logFC, -log10(SpatialFDR))) +
geom_point() +
geom_hline(yintercept = 1)
ggplot(comparison_resistance_mm10, aes(logFC, -log10(SpatialFDR))) +
geom_point() +
geom_hline(yintercept = 1)
hg19_scRNA_milo <- buildNhoodGraph(hg19_scRNA_milo)
## Plot single-cell UMAP
umap_hg19 <- plotReducedDim(hg19_scRNA_milo, dimred = "UMAP", colour_by="cluster_name", text_by = "cluster_name", text_size = 8) + NoLegend()
## Plot neighbourhood graph
nh_graph_hg19 <- plotNhoodGraphDA(hg19_scRNA_milo, comparison_subtype, layout="UMAP",alpha=0.05)
nh_graph_resistance_hg19 <- plotNhoodGraphDA(hg19_scRNA_milo, comparison_resistance, layout="UMAP",alpha=0.05)
comparison_subtype <- annotateNhoods(hg19_scRNA_milo, comparison_subtype, coldata_col = "cluster_name")
head(comparison_subtype)
## logFC logCPM F PValue FDR Nhood SpatialFDR
## 1 -0.9888326 11.87918 0.55462155 0.45705563 0.9655209 1 0.9677191
## 2 -0.4101012 12.60643 0.43488617 0.51013924 0.9784163 2 0.9755292
## 3 0.3058800 11.90601 0.06314886 0.80176955 0.9784163 3 0.9755292
## 4 2.3094264 11.81942 3.41015312 0.06584408 0.4220007 4 0.4388231
## 5 -0.4162361 12.12443 0.14770026 0.70103247 0.9784163 5 0.9755292
## 6 0.2553400 12.04828 0.04829574 0.82621480 0.9784163 6 0.9755292
## cluster_name cluster_name_fraction
## 1 Helper-T-Cells 1 0.8630137
## 2 Helper-T-Cells 1 1.0000000
## 3 Fibroblasts 1.0000000
## 4 Fibroblasts 0.9850746
## 5 Fibroblasts 0.9701493
## 6 NK cells 0.9367089
beeswarm_hg19 <- plotDAbeeswarm(comparison_subtype, group.by = "cluster_name") + theme(text = element_text(size = 15))
## Converting group.by to factor...
comparison_resistance <- annotateNhoods(hg19_scRNA_milo, comparison_resistance, coldata_col = "cluster_name")
head(comparison_resistance)
## logFC logCPM F PValue FDR Nhood SpatialFDR
## 1 -0.5388183 11.87918 0.16685978 0.6832278 0.9986069 1 0.9986069
## 2 -0.2814023 12.60643 0.20513737 0.6509540 0.9986069 2 0.9986069
## 3 -0.2723636 11.90601 0.05018627 0.8229017 0.9986069 3 0.9986069
## 4 0.7341273 11.81942 0.36115435 0.5483486 0.9986069 4 0.9986069
## 5 0.1369383 12.12443 0.01623871 0.8986899 0.9986069 5 0.9986069
## 6 0.1704957 12.04828 0.02157892 0.8833178 0.9986069 6 0.9986069
## cluster_name cluster_name_fraction
## 1 Helper-T-Cells 1 0.8630137
## 2 Helper-T-Cells 1 1.0000000
## 3 Fibroblasts 1.0000000
## 4 Fibroblasts 0.9850746
## 5 Fibroblasts 0.9701493
## 6 NK cells 0.9367089
beeswarm_drug_r_hg19 <- plotDAbeeswarm(comparison_resistance, group.by = "cluster_name") + theme(text = element_text(size = 17))
## Converting group.by to factor...
ggarrange(umap_hg19,nh_graph_hg19,beeswarm_hg19, nrow=1, common.legend=F, legend="right")
ggarrange(umap_hg19,nh_graph_resistance_hg19, nrow=1, common.legend=F, legend="right")
Now we’ll do the same for mouse cells
mm10_scRNA_milo <- buildNhoodGraph(mm10_scRNA_milo)
## Plot single-cell UMAP
umap_mm10 <- plotReducedDim(mm10_scRNA_milo, dimred = "UMAP", colour_by="cluster_name", text_by = "cluster_name", text_size = 8) +
guides(fill="none")
## Plot neighbourhood graph
nh_graph_mm10 <- plotNhoodGraphDA(mm10_scRNA_milo, comparison_subtype, layout="UMAP",alpha=0.05)
nh_graph_resistance_mm10 <- plotNhoodGraphDA(mm10_scRNA_milo, comparison_resistance, layout="UMAP",alpha=0.05)
comparison_subtype_mm10 <- annotateNhoods(mm10_scRNA_milo, comparison_subtype_mm10, coldata_col = "cluster_name")
head(comparison_subtype_mm10)
## logFC logCPM F PValue FDR Nhood SpatialFDR
## 1 -1.3409163 10.79332 2.02919644 0.155017519 0.6292723 1 0.6299447
## 2 2.9603631 10.91700 7.39373190 0.006870605 0.2304295 2 0.2610097
## 3 1.8937498 11.11692 3.90443508 0.048788812 0.4653760 3 0.4707781
## 4 1.2934908 11.60930 1.65334336 0.199186966 0.6789249 4 0.6759204
## 5 0.1090573 11.79476 0.01461196 0.903841811 0.9605876 5 0.9517545
## 6 0.8010234 11.78570 0.78005982 0.377609564 0.7622119 6 0.7353875
## cluster_name cluster_name_fraction
## 1 Fibroblasts 1 0.9782609
## 2 Macrophages 1 0.9545455
## 3 Macrophages 1 0.9629630
## 4 Macrophages 1 1.0000000
## 5 Endothelial cells 1.0000000
## 6 Fibroblasts 4 1.0000000
beeswarm_mm10 <- plotDAbeeswarm(comparison_subtype_mm10, group.by = "cluster_name") + theme(text = element_text(size = 15))
## Converting group.by to factor...
comparison_resistance_mm10 <- annotateNhoods(mm10_scRNA_milo, comparison_resistance_mm10, coldata_col = "cluster_name")
head(comparison_resistance_mm10)
## logFC logCPM F PValue FDR Nhood SpatialFDR
## 1 1.9347880 10.79332 4.1911020 0.04123552 0.6420960 1 0.6804031
## 2 -1.6163868 10.91700 2.3002897 0.13025045 0.6928377 2 0.7046129
## 3 -0.6384238 11.11692 0.4578582 0.49898545 0.8131859 3 0.8163237
## 4 -1.7584227 11.60930 2.9733330 0.08535607 0.6928377 4 0.7046129
## 5 1.0969756 11.79476 1.4900876 0.22286261 0.6928377 5 0.7091971
## 6 -0.8465461 11.78570 0.8726003 0.35075348 0.7682516 6 0.7814215
## cluster_name cluster_name_fraction
## 1 Fibroblasts 1 0.9782609
## 2 Macrophages 1 0.9545455
## 3 Macrophages 1 0.9629630
## 4 Macrophages 1 1.0000000
## 5 Endothelial cells 1.0000000
## 6 Fibroblasts 4 1.0000000
beeswarm_drug_r_mm10 <- plotDAbeeswarm(comparison_resistance_mm10, group.by = "cluster_name") + theme(text = element_text(size = 17))
## Converting group.by to factor...
ggarrange(umap_mm10,nh_graph_mm10,beeswarm_mm10, nrow=1, common.legend=F, legend="right")
ggarrange(umap_mm10,nh_graph_resistance_mm10, nrow=1, common.legend=F, legend="right")
We will create a new object because this pseudobulk methodoloy requires raw counts rather than normalized, scaled ones.
metadata_hg19 <- hg19_scRNA@meta.data
metadata_hg19$cluster_name <- factor(hg19_scRNA@active.ident)
hg19_scRNA_pseudo <- SingleCellExperiment(assays=list(counts=hg19_scRNA@assays$RNA@counts),
colData=metadata_hg19)
summed_counts_hg19 <- aggregateAcrossCells(hg19_scRNA_pseudo,
ids=colData(hg19_scRNA_pseudo)[,c("cluster_name","drug_resistance","subtype")], use.assay.type="counts")
summed_counts_hg19
## class: SingleCellExperiment
## dim: 32738 20
## metadata(0):
## assays(1): counts
## rownames(32738): MIR1302-10 FAM138A ... AC002321.2 AC002321.1
## rowData names(0):
## colnames: NULL
## colData names(15): orig.ident nCount_RNA ... subtype ncells
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
# Remove combinations with few cells
summed_counts_filt_hg19 <- summed_counts_hg19[,summed_counts_hg19$ncells>=10]
# We will use this wrapper function from the scatter package, that combines all steps required for differential expression analysis with edgeR
DE_subtype_hg19 <- pseudoBulkDGE(summed_counts_filt_hg19,
label=summed_counts_filt_hg19$cluster_name,
design=~drug_resistance+subtype, # Effect of subtype, accounting for drug resistance
coef="subtypeTriple_negative",
condition=summed_counts_filt_hg19$subtype
)
DE_subtype_hg19_genes <- list()
for (i in 1:length(DE_subtype_hg19)){
filtered_df <- DE_subtype_hg19[[i]] %>%
as.data.frame() %>%
#dplyr::filter(abs(logFC)>1 & FDR<0.01) %>%
mutate(cluster_name=names(DE_subtype_hg19)[i],
gene=rownames(.))
DE_subtype_hg19_genes[[i]] <- filtered_df
}
# Same for drug resistance effect
DE_drug_r_hg19 <- pseudoBulkDGE(summed_counts_filt_hg19,
label=summed_counts_filt_hg19$cluster_name,
design=~subtype + drug_resistance, # Effect drug resistance, accounting for subtype
coef="drug_resistanceSensitive",
condition=summed_counts_filt_hg19$drug_resistance
)
DE_drug_r_hg19_genes <- list()
for (i in 1:length(DE_drug_r_hg19)){
filtered_df <- DE_drug_r_hg19[[i]] %>%
as.data.frame() %>%
#dplyr::filter(abs(logFC)>0.5 & FDR<0.05) %>%
mutate(cluster_name=names(DE_drug_r_hg19)[i],
gene=rownames(.))
DE_drug_r_hg19_genes[[i]] <- filtered_df
}
volcano_colors <- c("cornflowerblue", "darkred","grey")
names(volcano_colors) <- c("DOWN", "UP", "NO")
plots_subtype_hg19 <- list()
for (i in 1:length(DE_subtype_hg19_genes)){
df = DE_subtype_hg19_genes[[i]]
df[is.na(df)] <- 0
df$diffexpressed <- "NO"
df$diffexpressed[df$logFC > 1 & df$FDR < 0.01] <- "UP"
df$diffexpressed[df$logFC < -1 & df$FDR < 0.01] <- "DOWN"
p <- ggplot(data=df, aes(x=logFC, y=-log10(FDR), col=diffexpressed)) +
geom_point() +
theme_minimal() +
scale_color_manual(values=volcano_colors) +
ggtitle(paste0(unique(df$cluster_name))) +
ylim(0,50)
plots_subtype_hg19[[i]] <-p
}
legend_volcano <- get_legend(plots_subtype_hg19[[1]])
ggarrange(plotlist=plots_subtype_hg19, legend.grob = legend_volcano, nrow = 1, legend="right")
plots_resistance_hg19 <- list()
for (i in 1:length(DE_drug_r_hg19_genes)){
df = DE_drug_r_hg19_genes[[i]]
df[is.na(df)] <- 0
df$diffexpressed <- "NO"
df$diffexpressed[df$logFC > 1 & df$FDR < 0.01] <- "UP"
df$diffexpressed[df$logFC < -1 & df$FDR < 0.01] <- "DOWN"
p <- ggplot(data=df, aes(x=logFC, y=-log10(FDR), col=diffexpressed)) +
geom_point() +
theme_minimal() +
scale_color_manual(values=volcano_colors) +
ggtitle(paste0(unique(df$cluster_name))) +
ylim(0,10)
plots_resistance_hg19[[i]] <-p
}
ggarrange(plotlist=plots_resistance_hg19, legend.grob = legend_volcano, nrow = 1, legend="right")
Let’s perform a functional enrichment analysis to see the functions these genes participate in:
plot_GO <- function(df, db){
df <- df %>% dplyr::filter(abs(logFC)>1 & FDR<0.01)
# Calculate enrichment
enrichment <- enrichGO(gene = df$gene,
OrgDb = db,
keyType = "SYMBOL",
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05)
enrichment <- clusterProfiler::simplify(enrichment, cutoff=0.7, by="p.adjust", select_fun=min)
GO_ggdata <- enrichment %>%
as_data_frame() %>%
arrange(Count)
GO_ggdata$Description <- factor(GO_ggdata$Description, levels = GO_ggdata$Description)
print(tail(GO_ggdata, n=20L))
ggplot(GO_ggdata, aes(x = Description, y = Count, fill = p.adjust)) +
geom_bar(stat = "identity") +
scale_colour_viridis_d(begin=0,end=1) +
coord_flip() +
ylab("Number of genes") +
xlab("GO Terms") +
theme(axis.text.y = element_text(size=10)) +
ggtitle(paste0("GO enrichment in", unique(df$cluster_name)))
}
# lapply(DE_drug_r_hg19_genes, function(x){plot_GO(df=x, db="org.Hs.eg.db")})
# There is a much smaller number of DE so the enrichment is non significant
lapply(DE_subtype_hg19_genes, function(x){plot_GO(df=x, db="org.Hs.eg.db")})
## Warning: `as_data_frame()` was deprecated in tibble 2.0.0.
## ℹ Please use `as_tibble()` instead.
## ℹ The signature and semantics have changed, see `?as_tibble`.
## # A tibble: 20 Ă— 9
## ID Descript…¹ GeneR…² BgRatio pvalue p.adjust qvalue geneID Count
## <chr> <fct> <chr> <chr> <dbl> <dbl> <dbl> <chr> <int>
## 1 GO:0044089 positive … 40/820 498/18… 1.41e- 4 3.64e- 3 2.88e- 3 CLSTN… 40
## 2 GO:0040013 negative … 41/820 419/18… 1.08e- 6 8.53e- 5 6.73e- 5 IL24/… 41
## 3 GO:0051960 regulatio… 41/820 456/18… 8.87e- 6 4.25e- 4 3.36e- 4 CLSTN… 41
## 4 GO:0001503 ossificat… 42/820 429/18… 7.86e- 7 6.63e- 5 5.23e- 5 ID3/R… 42
## 5 GO:0052547 regulatio… 42/820 459/18… 4.47e- 6 2.51e- 4 1.98e- 4 CASP9… 42
## 6 GO:0050678 regulatio… 44/820 414/18… 4.14e- 8 7.21e- 6 5.69e- 6 ERRFI… 44
## 7 GO:0048732 gland dev… 44/820 441/18… 2.54e- 7 2.76e- 5 2.18e- 5 WLS/T… 44
## 8 GO:0010975 regulatio… 45/820 446/18… 1.34e- 7 1.80e- 5 1.42e- 5 EPHB2… 45
## 9 GO:0050878 regulatio… 46/820 390/18… 7.46e-10 3.90e- 7 3.08e- 7 EPHB2… 46
## 10 GO:0016049 cell grow… 46/820 493/18… 9.18e- 7 7.50e- 5 5.92e- 5 CLSTN… 46
## 11 GO:0030198 extracell… 48/820 318/18… 3.84e-14 7.22e-11 5.70e-11 VWA1/… 48
## 12 GO:0043062 extracell… 48/820 319/18… 4.34e-14 7.22e-11 5.70e-11 VWA1/… 48
## 13 GO:0045229 external … 48/820 321/18… 5.53e-14 7.22e-11 5.70e-11 VWA1/… 48
## 14 GO:0032102 negative … 48/820 436/18… 3.18e- 9 1.28e- 6 1.01e- 6 SMPDL… 48
## 15 GO:0031589 cell-subs… 49/820 369/18… 2.86e-12 2.37e- 9 1.87e- 9 LAMB3… 49
## 16 GO:0001655 urogenita… 51/820 360/18… 7.91e-14 8.27e-11 6.53e-11 CASP9… 51
## 17 GO:0050673 epithelia… 51/820 481/18… 3.70e- 9 1.38e- 6 1.09e- 6 ERRFI… 51
## 18 GO:0045785 positive … 51/820 484/18… 4.57e- 9 1.59e- 6 1.26e- 6 RUNX3… 51
## 19 GO:0001667 ameboidal… 55/820 492/18… 1.23e-10 8.07e- 8 6.37e- 8 S100A… 55
## 20 GO:0042060 wound hea… 64/820 442/18… 1.54e-17 8.03e-14 6.34e-14 EPHB2… 64
## # … with abbreviated variable names ¹​Description, ²​GeneRatio
## # A tibble: 20 Ă— 9
## ID Description GeneR…¹ BgRatio pvalue p.adj…² qvalue geneID Count
## <chr> <fct> <chr> <chr> <dbl> <dbl> <dbl> <chr> <int>
## 1 GO:0045104 intermediate… 10/352 89/189… 6.09e-6 1.93e-3 1.59e-3 VIM/K… 10
## 2 GO:0045103 intermediate… 10/352 90/189… 6.74e-6 1.98e-3 1.63e-3 VIM/K… 10
## 3 GO:0048661 positive reg… 10/352 98/189… 1.45e-5 3.72e-3 3.06e-3 IGFBP… 10
## 4 GO:0042303 molting cycle 10/352 115/18… 5.82e-5 8.27e-3 6.81e-3 TGFB2… 10
## 5 GO:0042633 hair cycle 10/352 115/18… 5.82e-5 8.27e-3 6.81e-3 TGFB2… 10
## 6 GO:0007584 response to … 12/352 154/18… 3.23e-5 5.13e-3 4.22e-3 CYP1B… 12
## 7 GO:0002526 acute inflam… 13/352 114/18… 2.07e-7 2.66e-4 2.19e-4 S100A… 13
## 8 GO:0007568 aging 13/352 169/18… 1.75e-5 4.01e-3 3.30e-3 CASP9… 13
## 9 GO:0071695 anatomical s… 16/352 250/18… 1.97e-5 4.27e-3 3.51e-3 TGFB2… 16
## 10 GO:0021700 developmenta… 18/352 304/18… 1.73e-5 4.01e-3 3.30e-3 NFIA/… 18
## 11 GO:0070372 regulation o… 18/352 315/18… 2.77e-5 4.57e-3 3.76e-3 ERRFI… 18
## 12 GO:0032496 response to … 19/352 339/18… 2.18e-5 4.45e-3 3.66e-3 CASP9… 19
## 13 GO:0070371 ERK1 and ERK… 19/352 340/18… 2.27e-5 4.45e-3 3.66e-3 ERRFI… 19
## 14 GO:0043588 skin develop… 21/352 302/18… 2.58e-7 2.66e-4 2.19e-4 ERRFI… 21
## 15 GO:0001655 urogenital s… 22/352 360/18… 1.20e-6 9.00e-4 7.40e-4 CASP9… 22
## 16 GO:0032102 negative reg… 22/352 436/18… 2.52e-5 4.57e-3 3.76e-3 DUSP1… 22
## 17 GO:0042060 wound healing 23/352 442/18… 1.01e-5 2.77e-3 2.27e-3 SPRR3… 23
## 18 GO:0008544 epidermis de… 25/352 362/18… 2.09e-8 4.30e-5 3.54e-5 ERRFI… 25
## 19 GO:0050673 epithelial c… 26/352 481/18… 1.31e-6 9.00e-4 7.40e-4 ERRFI… 26
## 20 GO:0045785 positive reg… 31/352 484/18… 2.43e-9 1.00e-5 8.25e-6 TGFB2… 31
## # … with abbreviated variable names ¹​GeneRatio, ²​p.adjust
## # A tibble: 20 Ă— 9
## ID Description GeneR…¹ BgRatio pvalue p.adj…² qvalue geneID Count
## <chr> <fct> <chr> <chr> <dbl> <dbl> <dbl> <chr> <int>
## 1 GO:0032651 regulation o… 13/411 111/18… 8.77e-7 6.71e-4 5.72e-4 ERRFI… 13
## 2 GO:0022612 gland morpho… 13/411 123/18… 2.83e-6 1.22e-3 1.04e-3 IGFBP… 13
## 3 GO:0007584 response to … 14/411 154/18… 7.04e-6 2.01e-3 1.72e-3 CYP1B… 14
## 4 GO:0048469 cell maturat… 15/411 187/18… 1.52e-5 3.44e-3 2.93e-3 EPAS1… 15
## 5 GO:0007565 female pregn… 16/411 189/18… 3.99e-6 1.43e-3 1.22e-3 RGS2/… 16
## 6 GO:0044706 multi-multic… 16/411 218/18… 2.41e-5 4.07e-3 3.47e-3 RGS2/… 16
## 7 GO:0045444 fat cell dif… 17/411 244/18… 2.64e-5 4.07e-3 3.47e-3 RGS2/… 17
## 8 GO:0071695 anatomical s… 18/411 250/18… 9.84e-6 2.48e-3 2.12e-3 EPAS1… 18
## 9 GO:0043588 skin develop… 19/411 302/18… 3.71e-5 5.08e-3 4.34e-3 ERRFI… 19
## 10 GO:0070372 regulation o… 20/411 315/18… 2.04e-5 3.80e-3 3.24e-3 ERRFI… 20
## 11 GO:0001933 negative reg… 20/411 338/18… 5.52e-5 6.23e-3 5.31e-3 ERRFI… 20
## 12 GO:0032496 response to … 21/411 339/18… 1.84e-5 3.80e-3 3.24e-3 PDE4B… 21
## 13 GO:0070371 ERK1 and ERK… 21/411 340/18… 1.92e-5 3.80e-3 3.24e-3 ERRFI… 21
## 14 GO:0021700 developmenta… 22/411 304/18… 9.39e-7 6.71e-4 5.72e-4 CLSTN… 22
## 15 GO:0042326 negative reg… 22/411 381/18… 3.42e-5 5.06e-3 4.31e-3 ERRFI… 22
## 16 GO:0050727 regulation o… 23/411 414/18… 4.15e-5 5.08e-3 4.34e-3 CAMK2… 23
## 17 GO:0008544 epidermis de… 24/411 362/18… 1.44e-6 8.13e-4 6.93e-4 ERRFI… 24
## 18 GO:0042060 wound healing 24/411 442/18… 4.08e-5 5.08e-3 4.34e-3 S100A… 24
## 19 GO:2001233 regulation o… 27/411 374/18… 5.66e-8 2.43e-4 2.07e-4 S100A… 27
## 20 GO:0045785 positive reg… 29/411 484/18… 9.31e-7 6.71e-4 5.72e-4 DUSP1… 29
## # … with abbreviated variable names ¹​GeneRatio, ²​p.adjust
## # A tibble: 20 Ă— 9
## ID Description GeneR…¹ BgRatio pvalue p.adj…² qvalue geneID Count
## <chr> <fct> <chr> <chr> <dbl> <dbl> <dbl> <chr> <int>
## 1 GO:0062012 regulation … 27/701 339/18… 1.71e- 4 9.45e-3 7.93e-3 ARNT/… 27
## 2 GO:0002181 cytoplasmic… 29/701 161/18… 1.37e-12 6.95e-9 5.83e-9 RPS8/… 29
## 3 GO:0032496 response to… 29/701 339/18… 2.76e- 5 2.91e-3 2.44e-3 PDE4B… 29
## 4 GO:0008544 epidermis d… 29/701 362/18… 8.98e- 5 5.99e-3 5.02e-3 ERRFI… 29
## 5 GO:0050727 regulation … 31/701 414/18… 1.78e- 4 9.69e-3 8.13e-3 S100A… 31
## 6 GO:0006631 fatty acid … 32/701 400/18… 4.05e- 5 3.80e-3 3.19e-3 SCP2/… 32
## 7 GO:0050678 regulation … 32/701 414/18… 7.81e- 5 5.42e-3 4.55e-3 ERRFI… 32
## 8 GO:0043434 response to… 32/701 421/18… 1.07e- 4 6.76e-3 5.67e-3 ERRFI… 32
## 9 GO:0048545 response to… 33/701 339/18… 4.64e- 7 1.96e-4 1.64e-4 ERRFI… 33
## 10 GO:0031589 cell-substr… 33/701 369/18… 3.05e- 6 9.65e-4 8.10e-4 LAMB3… 33
## 11 GO:0001666 response to… 34/701 296/18… 5.31e- 9 8.97e-6 7.52e-6 ARNT/… 34
## 12 GO:0007159 leukocyte c… 34/701 414/18… 1.36e- 5 2.16e-3 1.81e-3 TNFRS… 34
## 13 GO:0062197 cellular re… 35/701 345/18… 7.60e- 8 4.28e-5 3.59e-5 ERRFI… 35
## 14 GO:0052547 regulation … 35/701 459/18… 4.82e- 5 4.36e-3 3.66e-3 DHCR2… 35
## 15 GO:0045936 negative re… 36/701 440/18… 8.31e- 6 1.83e-3 1.53e-3 ERRFI… 36
## 16 GO:0050673 epithelial … 36/701 481/18… 5.55e- 5 4.77e-3 4.00e-3 ERRFI… 36
## 17 GO:0042060 wound heali… 37/701 442/18… 3.69e- 6 1.09e-3 9.13e-4 S100A… 37
## 18 GO:0001667 ameboidal-t… 37/701 492/18… 3.97e- 5 3.79e-3 3.18e-3 TGFB2… 37
## 19 GO:0006979 response to… 41/701 434/18… 4.37e- 8 3.16e-5 2.65e-5 DHCR2… 41
## 20 GO:0045785 positive re… 47/701 484/18… 1.82e- 9 4.60e-6 3.86e-6 TNFRS… 47
## # … with abbreviated variable names ¹​GeneRatio, ²​p.adjust
## # A tibble: 20 Ă— 9
## ID Description GeneR…¹ BgRatio pvalue p.adj…² qvalue geneID Count
## <chr> <fct> <chr> <chr> <dbl> <dbl> <dbl> <chr> <int>
## 1 GO:0050900 leukocyte mi… 42/1108 398/18… 1.66e-4 9.03e-3 7.56e-3 NBL1/… 42
## 2 GO:0030198 extracellula… 43/1108 318/18… 2.67e-7 1.35e-4 1.13e-4 ADAMT… 43
## 3 GO:1901214 regulation o… 43/1108 325/18… 4.89e-7 1.81e-4 1.51e-4 CASP9… 43
## 4 GO:0050878 regulation o… 43/1108 390/18… 5.13e-5 4.59e-3 3.84e-3 VAV3/… 43
## 5 GO:0070997 neuron death 44/1108 368/18… 5.60e-6 1.03e-3 8.66e-4 CASP9… 44
## 6 GO:0001558 regulation o… 44/1108 422/18… 1.54e-4 8.63e-3 7.23e-3 CLSTN… 44
## 7 GO:0045936 negative reg… 45/1108 440/18… 2.02e-4 9.89e-3 8.28e-3 ERRFI… 45
## 8 GO:0006631 fatty acid m… 46/1108 400/18… 9.70e-6 1.57e-3 1.31e-3 FABP3… 46
## 9 GO:0032102 negative reg… 46/1108 436/18… 8.33e-5 6.24e-3 5.23e-3 NBL1/… 46
## 10 GO:0008544 epidermis de… 47/1108 362/18… 2.55e-7 1.35e-4 1.13e-4 ERRFI… 47
## 11 GO:0001655 urogenital s… 48/1108 360/18… 8.43e-8 6.68e-5 5.59e-5 CASP9… 48
## 12 GO:0031589 cell-substra… 48/1108 369/18… 1.80e-7 1.25e-4 1.04e-4 LAMB3… 48
## 13 GO:0001503 ossification 48/1108 429/18… 1.30e-5 1.81e-3 1.51e-3 ID3/R… 48
## 14 GO:0052547 regulation o… 49/1108 459/18… 3.67e-5 3.55e-3 2.97e-3 CASP9… 49
## 15 GO:0016049 cell growth 50/1108 493/18… 1.13e-4 7.79e-3 6.52e-3 CLSTN… 50
## 16 GO:0050673 epithelial c… 51/1108 481/18… 3.07e-5 3.21e-3 2.69e-3 ERRFI… 51
## 17 GO:0006979 response to … 52/1108 434/18… 7.56e-7 2.33e-4 1.95e-4 DHCR2… 52
## 18 GO:0001667 ameboidal-ty… 52/1108 492/18… 2.80e-5 2.99e-3 2.50e-3 S100A… 52
## 19 GO:0042060 wound healing 56/1108 442/18… 4.31e-8 4.78e-5 4.00e-5 VAV3/… 56
## 20 GO:0045785 positive reg… 61/1108 484/18… 1.30e-8 3.23e-5 2.70e-5 RUNX3… 61
## # … with abbreviated variable names ¹​GeneRatio, ²​p.adjust
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
metadata_mm10 <- mm10_scRNA@meta.data
metadata_mm10$cluster_name <- factor(mm10_scRNA@active.ident)
mm10_scRNA_pseudo <- SingleCellExperiment(assays=list(counts=mm10_scRNA@assays$RNA@counts),
colData=metadata_mm10)
summed_counts_mm10 <- aggregateAcrossCells(mm10_scRNA_pseudo,
ids=colData(mm10_scRNA_pseudo)[,c("cluster_name","drug_resistance","subtype")], use.assay.type="counts")
summed_counts_mm10
## class: SingleCellExperiment
## dim: 27998 55
## metadata(0):
## assays(1): counts
## rownames(27998): Xkr4 Gm1992 ... Vmn2r122 CAAA01147332.1
## rowData names(0):
## colnames: NULL
## colData names(16): orig.ident nCount_RNA ... subtype ncells
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
# Remove combinations with few cells
summed_counts_filt_mm10 <- summed_counts_mm10[,summed_counts_mm10$ncells>=10]
# We will use this wrapper function from the scatter package, that combines all steps required for differential expression analysis with edgeR
DE_subtype_mm10 <- pseudoBulkDGE(summed_counts_filt_mm10,
label=summed_counts_filt_mm10$cluster_name,
design=~drug_resistance+subtype, # Effect of subtype, accounting for drug resistance
coef="subtypeTriple_negative",
condition=summed_counts_filt_mm10$subtype
)
DE_subtype_mm10_genes <- list()
for (i in 1:length(DE_subtype_mm10)){
filtered_df <- DE_subtype_mm10[[i]] %>%
as.data.frame() %>%
#dplyr::filter(abs(logFC)>1 & FDR<0.01) %>%
mutate(cluster_name=names(DE_subtype_mm10)[i],
gene=rownames(.))
DE_subtype_mm10_genes[[i]] <- filtered_df
}
# Same for drug resistance effect
DE_drug_r_mm10 <- pseudoBulkDGE(summed_counts_filt_mm10,
label=summed_counts_filt_mm10$cluster_name,
design=~subtype + drug_resistance, # Effect drug resistance, accounting for subtype
coef="drug_resistanceSensitive",
condition=summed_counts_filt_mm10$drug_resistance
)
DE_drug_r_mm10_genes <- list()
for (i in 1:length(DE_drug_r_mm10)){
filtered_df <- DE_drug_r_mm10[[i]] %>%
as.data.frame() %>%
#dplyr::filter(abs(logFC)>0.5 & FDR<0.05) %>%
mutate(cluster_name=names(DE_drug_r_mm10)[i],
gene=rownames(.))
DE_drug_r_mm10_genes[[i]] <- filtered_df
}
plots_subtype_mm10 <- list()
for (i in 1:length(DE_subtype_mm10_genes)){
df = DE_subtype_mm10_genes[[i]]
df[is.na(df)] <- 0
df$diffexpressed <- "NO"
df$diffexpressed[df$logFC > 1 & df$FDR < 0.01] <- "UP"
df$diffexpressed[df$logFC < -1 & df$FDR < 0.01] <- "DOWN"
p <- ggplot(data=df, aes(x=logFC, y=-log10(FDR), col=diffexpressed)) +
geom_point() +
theme_minimal() +
scale_color_manual(values=volcano_colors) +
ggtitle(paste0(unique(df$cluster_name))) +
ylim(0,15)
plots_subtype_mm10[[i]] <-p
}
ggarrange(plotlist=plots_subtype_mm10, legend.grob = legend_volcano, nrow = 1, legend="right")
plots_resistance_mm10 <- list()
for (i in 1:length(DE_drug_r_mm10_genes)){
df = DE_drug_r_mm10_genes[[i]]
df[is.na(df)] <- 0
df$diffexpressed <- "NO"
df$diffexpressed[df$logFC > 1 & df$FDR < 0.01] <- "UP"
df$diffexpressed[df$logFC < -1 & df$FDR < 0.01] <- "DOWN"
p <- ggplot(data=df, aes(x=logFC, y=-log10(FDR), col=diffexpressed)) +
geom_point() +
theme_minimal() +
scale_color_manual(values=volcano_colors) +
ggtitle(paste0(unique(df$cluster_name))) +
ylim(0,15)
plots_resistance_mm10[[i]] <-p
}
ggarrange(plotlist=plots_resistance_mm10, legend.grob = legend_volcano, nrow = 1, legend="right")
Let’s perform a functional enrichment analysis to see the functions these genes participate in:
lapply(DE_drug_r_mm10_genes, function(x){plot_GO(df=x, db="org.Mm.eg.db")})
lapply(DE_subtype_mm10_genes, function(x){plot_GO(df=x, db="org.Mm.eg.db")})
There is no significant enrichment of these genes.
sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22000)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=Spanish_Spain.utf8 LC_CTYPE=Spanish_Spain.utf8
## [3] LC_MONETARY=Spanish_Spain.utf8 LC_NUMERIC=C
## [5] LC_TIME=Spanish_Spain.utf8
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] ggpubr_0.5.0 patchwork_1.1.2
## [3] dplyr_1.0.10 msigdbr_7.5.1
## [5] clusterProfiler_4.6.0 org.Mm.eg.db_3.16.0
## [7] org.Hs.eg.db_3.16.0 AnnotationDbi_1.60.0
## [9] scran_1.26.0 scater_1.26.1
## [11] ggplot2_3.4.0 scuttle_1.8.1
## [13] SingleCellExperiment_1.20.0 SummarizedExperiment_1.28.0
## [15] Biobase_2.58.0 GenomicRanges_1.50.1
## [17] GenomeInfoDb_1.34.3 IRanges_2.32.0
## [19] S4Vectors_0.36.0 BiocGenerics_0.44.0
## [21] MatrixGenerics_1.10.0 matrixStats_0.63.0
## [23] SeuratObject_4.1.3 Seurat_4.3.0
## [25] miloR_1.6.0 edgeR_3.40.0
## [27] limma_3.54.0
##
## loaded via a namespace (and not attached):
## [1] scattermore_0.8 tidyr_1.2.1
## [3] bit64_4.0.5 knitr_1.41
## [5] irlba_2.3.5.1 DelayedArray_0.24.0
## [7] data.table_1.14.6 KEGGREST_1.38.0
## [9] RCurl_1.98-1.9 generics_0.1.3
## [11] ScaledMatrix_1.6.0 cowplot_1.1.1
## [13] RSQLite_2.2.19 shadowtext_0.1.2
## [15] RANN_2.6.1 future_1.29.0
## [17] bit_4.0.5 enrichplot_1.18.1
## [19] spatstat.data_3.0-0 httpuv_1.6.6
## [21] assertthat_0.2.1 viridis_0.6.2
## [23] xfun_0.35 jquerylib_0.1.4
## [25] babelgene_22.9 evaluate_0.18
## [27] promises_1.2.0.1 fansi_1.0.3
## [29] igraph_1.3.5 DBI_1.1.3
## [31] htmlwidgets_1.5.4 spatstat.geom_3.0-3
## [33] purrr_0.3.5 ellipsis_0.3.2
## [35] backports_1.4.1 deldir_1.0-6
## [37] sparseMatrixStats_1.10.0 vctrs_0.5.1
## [39] ROCR_1.0-11 abind_1.4-5
## [41] cachem_1.0.6 withr_2.5.0
## [43] ggforce_0.4.1 HDO.db_0.99.1
## [45] progressr_0.11.0 sctransform_0.3.5
## [47] treeio_1.22.0 goftest_1.2-3
## [49] cluster_2.1.4 DOSE_3.24.2
## [51] ape_5.6-2 lazyeval_0.2.2
## [53] crayon_1.5.2 spatstat.explore_3.0-5
## [55] labeling_0.4.2 pkgconfig_2.0.3
## [57] tweenr_2.0.2 nlme_3.1-160
## [59] vipor_0.4.5 rlang_1.0.6
## [61] globals_0.16.2 lifecycle_1.0.3
## [63] miniUI_0.1.1.1 downloader_0.4
## [65] rsvd_1.0.5 polyclip_1.10-4
## [67] lmtest_0.9-40 Matrix_1.5-1
## [69] aplot_0.1.9 carData_3.0-5
## [71] zoo_1.8-11 beeswarm_0.4.0
## [73] ggridges_0.5.4 png_0.1-7
## [75] viridisLite_0.4.1 bitops_1.0-7
## [77] gson_0.0.9 KernSmooth_2.23-20
## [79] Biostrings_2.66.0 blob_1.2.3
## [81] DelayedMatrixStats_1.20.0 stringr_1.4.1
## [83] qvalue_2.30.0 parallelly_1.32.1
## [85] spatstat.random_3.0-1 rstatix_0.7.1
## [87] gridGraphics_0.5-1 ggsignif_0.6.4
## [89] beachmat_2.14.0 scales_1.2.1
## [91] memoise_2.0.1 magrittr_2.0.3
## [93] plyr_1.8.8 ica_1.0-3
## [95] zlibbioc_1.44.0 compiler_4.2.2
## [97] scatterpie_0.1.8 dqrng_0.3.0
## [99] RColorBrewer_1.1-3 fitdistrplus_1.1-8
## [101] cli_3.4.1 XVector_0.38.0
## [103] listenv_0.8.0 pbapply_1.6-0
## [105] MASS_7.3-58.1 tidyselect_1.2.0
## [107] stringi_1.7.8 highr_0.9
## [109] yaml_2.3.6 GOSemSim_2.24.0
## [111] BiocSingular_1.14.0 locfit_1.5-9.6
## [113] ggrepel_0.9.2 grid_4.2.2
## [115] sass_0.4.4 fastmatch_1.1-3
## [117] tools_4.2.2 future.apply_1.10.0
## [119] parallel_4.2.2 rstudioapi_0.14
## [121] bluster_1.8.0 metapod_1.6.0
## [123] gridExtra_2.3 farver_2.1.1
## [125] Rtsne_0.16 ggraph_2.1.0
## [127] digest_0.6.30 shiny_1.7.3
## [129] Rcpp_1.0.9 car_3.1-1
## [131] broom_1.0.1 later_1.3.0
## [133] RcppAnnoy_0.0.20 httr_1.4.4
## [135] colorspace_2.0-3 tensor_1.5
## [137] reticulate_1.26 splines_4.2.2
## [139] uwot_0.1.14 yulab.utils_0.0.5
## [141] statmod_1.4.37 tidytree_0.4.1
## [143] spatstat.utils_3.0-1 graphlayouts_0.8.4
## [145] sp_1.5-1 ggplotify_0.1.0
## [147] plotly_4.10.1 xtable_1.8-4
## [149] jsonlite_1.8.3 ggtree_3.6.2
## [151] tidygraph_1.2.2 ggfun_0.0.9
## [153] R6_2.5.1 pillar_1.8.1
## [155] htmltools_0.5.3 mime_0.12
## [157] glue_1.6.2 fastmap_1.1.0
## [159] BiocParallel_1.32.1 BiocNeighbors_1.16.0
## [161] codetools_0.2-18 fgsea_1.24.0
## [163] utf8_1.2.2 lattice_0.20-45
## [165] bslib_0.4.1 spatstat.sparse_3.0-0
## [167] tibble_3.1.8 ggbeeswarm_0.6.0
## [169] leiden_0.4.3 gtools_3.9.3
## [171] GO.db_3.16.0 survival_3.4-0
## [173] rmarkdown_2.18 munsell_0.5.0
## [175] GenomeInfoDbData_1.2.9 reshape2_1.4.4
## [177] gtable_0.3.1